{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "“TextCNN-Torch.ipynb”的副本",
      "provenance": [],
      "collapsed_sections": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "code",
      "metadata": {
        "id": "sjNbhLpVNceY",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "'''\n",
        "  code by Tae Hwan Jung(Jeff Jung) @graykode, modify by wmathor\n",
        "'''\n",
        "import torch\n",
        "import numpy as np\n",
        "import torch.nn as nn\n",
        "import torch.optim as optim\n",
        "import torch.utils.data as Data\n",
        "import torch.nn.functional as F\n",
        "\n",
        "dtype = torch.FloatTensor\n",
        "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "5jaRDe00eqZZ",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# 3 words sentences (=sequence_length is 3)\n",
        "sentences = [\"i love you\", \"he loves me\", \"she likes baseball\", \"i hate you\", \"sorry for that\", \"this is awful\"]\n",
        "labels = [1, 1, 1, 0, 0, 0]  # 1 is good, 0 is not good.\n",
        "\n",
        "# TextCNN Parameter\n",
        "embedding_size = 2 # wordemb dim\n",
        "sequence_length = len(sentences[0]) # every sentences contains sequence_length(=3) words\n",
        "num_classes = len(set(labels))  # 0 or 1\n",
        "batch_size = 3\n",
        "\n",
        "word_list = \" \".join(sentences).split()\n",
        "vocab = list(set(word_list))\n",
        "word2idx = {w: i for i, w in enumerate(vocab)}\n",
        "vocab_size = len(vocab)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "O6kGPKVrNiJJ",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "def make_data(sentences, labels):\n",
        "  inputs = []\n",
        "  for sen in sentences:\n",
        "      inputs.append([word2idx[n] for n in sen.split()])\n",
        "\n",
        "  targets = []\n",
        "  for out in labels:\n",
        "      targets.append(out) # To using Torch Softmax Loss function\n",
        "  return inputs, targets\n",
        "\n",
        "input_batch, target_batch = make_data(sentences, labels)\n",
        "input_batch, target_batch = torch.LongTensor(input_batch), torch.LongTensor(target_batch)\n",
        "\n",
        "dataset = Data.TensorDataset(input_batch, target_batch)\n",
        "loader = Data.DataLoader(dataset, batch_size, True)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "dzhILRnNVS-a",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "class TextCNN(nn.Module):\n",
        "    def __init__(self):\n",
        "        super(TextCNN, self).__init__()\n",
        "        self.W = nn.Embedding(vocab_size, embedding_size)\n",
        "        output_channel = 3\n",
        "        self.conv = nn.Sequential(\n",
        "            # conv : [input_channel(=1), output_channel, (filter_height, filter_width), stride=1]\n",
        "            nn.Conv2d(1, output_channel, (2, embedding_size)), # => [batch_size, output_channel, 2, 1]\n",
        "            nn.ReLU(),\n",
        "            # pool : ((filter_height, filter_width))\n",
        "            nn.MaxPool2d((2, 1)),\n",
        "        )\n",
        "        # fc\n",
        "        self.fc = nn.Linear(output_channel, num_classes)\n",
        "\n",
        "    def forward(self, X):\n",
        "      '''\n",
        "      X: [batch_size, sequence_length]\n",
        "      '''\n",
        "      batch_size = X.shape[0]\n",
        "      embedding_X = self.W(X) # [batch_size, sequence_length, embedding_size]\n",
        "      embedding_X = embedding_X.unsqueeze(1) # add channel(=1) [batch, channel(=1), sequence_length, embedding_size]\n",
        "      conved = self.conv(embedding_X) # [batch_size, output_channel, 1, 1]\n",
        "      flatten = conved.view(batch_size, -1) # [batch_size, output_channel*1*1]\n",
        "      output = self.fc(flatten)\n",
        "      return output"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "_KQFe0ZZVsJS",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 185
        },
        "outputId": "dfce89a5-62b2-40e3-b7b9-4561064ae27b"
      },
      "source": [
        "model = TextCNN().to(device)\n",
        "criterion = nn.CrossEntropyLoss().to(device)\n",
        "optimizer = optim.Adam(model.parameters(), lr=1e-3)\n",
        "\n",
        "# Training\n",
        "for epoch in range(5000):\n",
        "  for batch_x, batch_y in loader:\n",
        "    batch_x, batch_y = batch_x.to(device), batch_y.to(device)\n",
        "    pred = model(batch_x)\n",
        "    loss = criterion(pred, batch_y)\n",
        "    if (epoch + 1) % 1000 == 0:\n",
        "        print('Epoch:', '%04d' % (epoch + 1), 'loss =', '{:.6f}'.format(loss))\n",
        "\n",
        "    optimizer.zero_grad()\n",
        "    loss.backward()\n",
        "    optimizer.step()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Epoch: 1000 loss = 0.001498\n",
            "Epoch: 1000 loss = 0.001487\n",
            "Epoch: 2000 loss = 0.000220\n",
            "Epoch: 2000 loss = 0.000215\n",
            "Epoch: 3000 loss = 0.000058\n",
            "Epoch: 3000 loss = 0.000050\n",
            "Epoch: 4000 loss = 0.000023\n",
            "Epoch: 4000 loss = 0.000009\n",
            "Epoch: 5000 loss = 0.000005\n",
            "Epoch: 5000 loss = 0.000005\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "r6fMUzoKQP6O",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "a87cb734-4375-4bea-a766-48d6e0b39578"
      },
      "source": [
        "# Test\n",
        "test_text = 'i hate me'\n",
        "tests = [[word2idx[n] for n in test_text.split()]]\n",
        "test_batch = torch.LongTensor(tests).to(device)\n",
        "# Predict\n",
        "model = model.eval()\n",
        "predict = model(test_batch).data.max(1, keepdim=True)[1]\n",
        "if predict[0][0] == 0:\n",
        "    print(test_text,\"is Bad Mean...\")\n",
        "else:\n",
        "    print(test_text,\"is Good Mean!!\")"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "i hate me is Bad Mean...\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "LNREgu9WARks",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}